From 7b915d3eaf2802578456359b5e756af0c1246c58 Mon Sep 17 00:00:00 2001 From: Keir Fraser Date: Fri, 20 Mar 2009 08:59:47 +0000 Subject: [PATCH] xenpm: Add a small scheduler knob "sched_smt_power_savings" Current scheduler only care performance, thus always picks pCPU from the most idle package. This knob provides another option to pick pCPU from least idle package, for user who want performance power balance. Signed-off-by: Yu Ke Signed-off-by: Tian Kevin --- tools/libxc/xc_pm.c | 17 +++++++++++++++++ tools/libxc/xenctrl.h | 2 ++ tools/misc/xenpm.c | 32 ++++++++++++++++++++++++++++++++ xen/common/sched_credit.c | 7 +++++-- xen/common/schedule.c | 9 +++++++++ xen/drivers/acpi/pmstat.c | 11 +++++++++++ xen/include/public/sysctl.h | 10 ++++++---- xen/include/xen/sched.h | 2 ++ 8 files changed, 84 insertions(+), 6 deletions(-) diff --git a/tools/libxc/xc_pm.c b/tools/libxc/xc_pm.c index 918c604010..13342a3121 100644 --- a/tools/libxc/xc_pm.c +++ b/tools/libxc/xc_pm.c @@ -345,3 +345,20 @@ int xc_get_cputopo(int xc_handle, struct xc_get_cputopo *info) return rc; } +/* value: 0 - disable sched_smt_power_savings + 1 - enable sched_smt_power_savings + */ +int xc_set_sched_opt_smt(int xc_handle, uint32_t value) +{ + int rc; + DECLARE_SYSCTL; + + sysctl.cmd = XEN_SYSCTL_pm_op; + sysctl.u.pm_op.cmd = XEN_SYSCTL_pm_op_set_sched_opt_smt; + sysctl.u.pm_op.cpuid = 0; + sysctl.u.pm_op.set_sched_opt_smt = value; + rc = do_sysctl(xc_handle, &sysctl); + + return rc; +} + diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h index 4a30c68fe4..9ce228620a 100644 --- a/tools/libxc/xenctrl.h +++ b/tools/libxc/xenctrl.h @@ -1260,4 +1260,6 @@ struct xc_get_cputopo { int xc_get_cputopo(int xc_handle, struct xc_get_cputopo *info); +int xc_set_sched_opt_smt(int xc_handle, uint32_t value); + #endif /* XENCTRL_H */ diff --git a/tools/misc/xenpm.c b/tools/misc/xenpm.c index 3540ab2de8..39eef6517f 100644 --- a/tools/misc/xenpm.c +++ b/tools/misc/xenpm.c @@ -56,6 +56,7 @@ void show_help(void) " set-up-threshold [cpuid] set up threshold on CPU or all\n" " it is used in ondemand governor.\n" " get-cpu-topology get thread/core/socket topology info\n" + " set-sched-smt enable|disable enable/disable scheduler smt power saving\n" " start [seconds] start collect Cx/Px statistics,\n" " output after CTRL-C or SIGINT or several seconds.\n" ); @@ -838,6 +839,36 @@ void cpu_topology_func(int argc, char *argv[]) return ; } +void set_sched_smt_func(int argc, char *argv[]) +{ + int value, rc; + + if (argc != 1){ + show_help(); + exit(-1); + } + + if ( !strncmp(argv[0], "disable", sizeof("disable")) ) + { + value = 0; + } + else if ( !strncmp(argv[0], "enable", sizeof("enable")) ) + { + value = 1; + } + else + { + show_help(); + exit(-1); + } + + rc = xc_set_sched_opt_smt(xc_fd, value); + printf("%s sched_smt_power_savings %s\n", argv[0], + rc? "failed":"successeed" ); + + return; +} + struct { const char *name; void (*function)(int argc, char *argv[]); @@ -854,6 +885,7 @@ struct { { "set-sampling-rate", scaling_sampling_rate_func }, { "set-up-threshold", scaling_up_threshold_func }, { "get-cpu-topology", cpu_topology_func}, + { "set-sched-smt", set_sched_smt_func}, }; int main(int argc, char *argv[]) diff --git a/xen/common/sched_credit.c b/xen/common/sched_credit.c index f2c3cde0fd..d724293bc1 100644 --- a/xen/common/sched_credit.c +++ b/xen/common/sched_credit.c @@ -387,7 +387,7 @@ csched_cpu_pick(struct vcpu *vc) { cpumask_t cpu_idlers; cpumask_t nxt_idlers; - int nxt; + int nxt, weight_cpu, weight_nxt; nxt = cycle_cpu(cpu, cpus); @@ -404,7 +404,10 @@ csched_cpu_pick(struct vcpu *vc) cpus_and(nxt_idlers, idlers, cpu_core_map[nxt]); } - if ( cpus_weight(cpu_idlers) < cpus_weight(nxt_idlers) ) + weight_cpu = cpus_weight(cpu_idlers); + weight_nxt = cpus_weight(nxt_idlers); + if ( ( (weight_cpu < weight_nxt) ^ sched_smt_power_savings ) + && (weight_cpu != weight_nxt) ) { cpu = nxt; cpu_clear(cpu, cpus); diff --git a/xen/common/schedule.c b/xen/common/schedule.c index 7ce18b14e9..5e91f6c85d 100644 --- a/xen/common/schedule.c +++ b/xen/common/schedule.c @@ -38,6 +38,13 @@ static char opt_sched[10] = "credit"; string_param("sched", opt_sched); +/* if sched_smt_power_savings is set, + * scheduler will give preferrence to partially idle package compared to + * the full idle package, when picking pCPU to schedule vCPU. + */ +int sched_smt_power_savings = 0; +boolean_param("sched_smt_power_savings", sched_smt_power_savings); + #define TIME_SLOP (s32)MICROSECS(50) /* allow time to slip a bit */ /* Various timer handlers. */ @@ -942,6 +949,8 @@ void dump_runq(unsigned char key) printk("Scheduler: %s (%s)\n", ops.name, ops.opt_name); SCHED_OP(dump_settings); + printk("sched_smt_power_savings: %s\n", + sched_smt_power_savings? "enabled":"disabled"); printk("NOW=0x%08X%08X\n", (u32)(now>>32), (u32)now); for_each_online_cpu ( i ) diff --git a/xen/drivers/acpi/pmstat.c b/xen/drivers/acpi/pmstat.c index 76e91a4288..1ac35c8237 100644 --- a/xen/drivers/acpi/pmstat.c +++ b/xen/drivers/acpi/pmstat.c @@ -516,6 +516,17 @@ int do_pm_op(struct xen_sysctl_pm_op *op) break; } + case XEN_SYSCTL_pm_op_set_sched_opt_smt: + { + uint32_t saved_value; + + saved_value = sched_smt_power_savings; + sched_smt_power_savings = !!op->set_sched_opt_smt; + op->set_sched_opt_smt = saved_value; + + break; + } + default: printk("not defined sub-hypercall @ do_pm_op\n"); ret = -ENOSYS; diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h index 2c0c5dcc33..48d327c067 100644 --- a/xen/include/public/sysctl.h +++ b/xen/include/public/sysctl.h @@ -348,11 +348,9 @@ struct xen_set_cpufreq_para { uint32_t ctrl_type; uint32_t ctrl_value; -} -; -/* Get physical CPU topology information. */ - +}; +/* Get physical CPU topology information. */ #define INVALID_TOPOLOGY_ID (~0U) struct xen_get_cputopo { /* IN: maximum addressable entry in @@ -381,6 +379,9 @@ struct xen_sysctl_pm_op { /* get CPU topology */ #define XEN_SYSCTL_pm_op_get_cputopo 0x20 + /* set/reset scheduler power saving option */ + #define XEN_SYSCTL_pm_op_set_sched_opt_smt 0x21 + uint32_t cmd; uint32_t cpuid; union { @@ -389,6 +390,7 @@ struct xen_sysctl_pm_op { struct xen_set_cpufreq_para set_para; uint64_t get_avgfreq; struct xen_get_cputopo get_topo; + uint32_t set_sched_opt_smt; }; }; diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h index 5978985c42..efaec7e9b7 100644 --- a/xen/include/xen/sched.h +++ b/xen/include/xen/sched.h @@ -550,6 +550,8 @@ uint64_t get_cpu_idle_time(unsigned int cpu); #define is_hvm_vcpu(v) (is_hvm_domain(v->domain)) #define need_iommu(d) ((d)->need_iommu && !(d)->is_hvm) +extern int sched_smt_power_savings; + extern enum cpufreq_controller { FREQCTL_none, FREQCTL_dom0_kernel, FREQCTL_xen } cpufreq_controller; -- 2.30.2